import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
file_path = './IMDB-Movie-Data.csv'
df = pd.read_csv(file_path)
temp_list = df["Genre"].str.split(',').tolist()
# print(temp_list)
genre_list = list(set(i for j in temp_list for i in j))
print(genre_list)
# zero_df = pd.DataFrame(np.zeros((df.shape[0],len(genre_list))),columns = genre_list)
# for i in range(df.shape[0]):
#     #zero_df.loc[0,['sci_fi','Mucical']]=1
#     zero_df.loc[i,temp_list[i]] = 1
# genre_count = zero_df.sum(axis=0)
# #排序
# genre_count = genre_count.sort_values()
# x = genre_count.index
# y = genre_count.values
# #画图
# plt.figure(figsize=(20,8),dpi=80)
# plt.bar(x,y)
# plt.show()